package org.example
import org.apache.spark.sql.SparkSession
object spark_data1 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .master("local[*]")
      .getOrCreate()
    val sc = spark.sparkContext
    val filepath = "src/main/java/com/movie/data/"
    val usersRDD = sc.textFile(filepath + "users.dat")
    val occupationsRDD = sc.textFile(filepath + "occupations.dat")
    val ratingsRDD = sc.textFile(filepath + "ratings.dat")
    val moviesRDD = sc.textFile(filepath + "movies.dat")
    occupationsRDD.cache()
    usersRDD.cache()
    ratingsRDD.cache()
    moviesRDD.cache()
    println("职业数:" + occupationsRDD.count())
    println("电影数:" + moviesRDD.count())
    println("用户数:" + usersRDD.count())
    println("评分条数:" + ratingsRDD.count)

  }

}
